********************************************************************************
* 
* Table A9: Estimates to Evaluate the Sensitivity of Results to Controlling for Pre-Trends
* 
********************************************************************************


*******
******* Settings and directories 
*******

** specify an output directory 
local fpath_output "/homes/nber/shruthi-dua51934/sacarny-DUA51934/shruthi-dua51934/replication_files/output_20230606"

** directory containing the main analytic file
local fpath_build_output = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/output"

** directory ccontaining himss supplemental variables 
local fpath_himss = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/himss/input/"

** directory containing physician flows supplemental data 
local fpath_physician_flows = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/physician_flows/output/"

** replication analysis directory 
local fpath_analysis = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/analysis_20230606/"

*******
******* which sections of code to run?
*******
local prepare_data = 1
local run_bh_test = 1
local clean_tables = 1




*******
******* specify RHS variables 
*******

local main_vars "discvendcount wcost_entrants wcost_exits wcost_churn logfte lw_capinv_tot ladjcosts_w ldafny_price05 ladjrev_w profit_margin_w"



*******
******* read and assemble the data 
******
if `prepare_data' == 1{
	
	* bring in the hospital-year panel  
	use "`fpath_build_output'/acq_cleaned_complete_20230606.dta", clear 

	
	* clean up
	label var hospbd "Total beds"

	* reformat urbancbsa variable: 1 if rural 
	bys id: egen urban2 = max(urbancbsa)
	replace urbancbsa = urban2 
	replace urbancbsa = 1 - urbancbsa 
	label var urbancbsa "Share rural hospitals"

	* drop if missing hrrcode
	drop if missing(hrrcode)

	* numeric variable for the aha id 
	egen id2 = group(id)


	* total cost per adjusted discharge 
	gen adj_costs = totcost / ((1000000)*ipdischarges_adultped*(1 + (opcharge/ipcharge)))
	label var adj_costs "Adjusted costs per IP discharge (\$1 millions)" 
	winsor2 adj_costs, cuts(05 95) by(year)

	* total revenue per adjusted discharge 
	gen adj_revenue = income / ((1000000)*ipdischarges_adultped*(1 + (opcharge/ipcharge)))
	label var adj_revenue "Adjusted revenue per IP discharge (\$1 millions)"
	winsor2 adj_revenue, cuts(05 95) by(year)

	* log costs and revenue 
	gen ladjcosts_w = log((adj_costs_w))
	label var ladjcosts_w "Log(costs/bed)"

	gen ladjrev_w = log((adj_revenue_w))
	label var ladjcosts_w "Log(revenue/bed)"

	* Winsorize dafny price index 
	winsor2 dafny_price, cuts(05 95) by(year)
	gen ldafny_price05 = log(dafny_price_w)
	label var ldafny_price05 "log(Dafny price index)"

	* winsorize profit margin in each year
	winsor2 profit_margin, cuts(05 95) by(year) label

	* capital investment 
	replace capinv_tot = capinv_tot/1000000
	label var capinv_tot "Capital investment \$millions"
	
	* winsorize and log  capital investment
	winsor2 capinv_tot, cuts(05 95) by(year) label
	gen lw_capinv_tot = log(1+capinv_tot_w)
	label var lw_capinv_tot "log(1+winsorized capital investment)"


	* scale FTE counts by beds 
	gen fte_per_bed = fte/hospbd
	label var fte_per_bed  "Full time employees per bed"

	* create log(fte)
	gen logfte = log(fte)
	label var logfte "log(FTE)"
	label var fte "Full Time Employees"


	********
	******** for the difference-in-difference analysis
	******** 
	**  restrict the sample: Legacy, Target, Other forprofits 
	keep if forprofit == 1 
	drop if target2 == 1 | acq_other == 1 

	label var ind08_11acq_legacy "Acquirer * 08-11"
	label var ind12_14target "Target * 12-14"

	* define post-treatment 
	gen postm = ( year >= 2008)
	gen postm_target = postm * target 
	gen postm_legacy = postm * acq_legacy 
	label var postm_target "Post 2008 * Target"
	label var postm_legacy "Post 2008 * Acquirer"

	* define interim
	gen interim = (year == 2007)
	gen interim_target = interim * target
	gen interim_legacy = interim * acq_legacy

	label var interim_target "Target 2007"
	label var interim_legacy "Acquirer 2007"

	* sanity check... 
	assert target2 + acq_other == 0 

	* event study
	* generate year dummies 
	tab year, gen(yr)

	*interact year dummies with acq legacy/ target indicators  
	foreach v of varlist yr* {
		gen target_`v' = target * `v'	
		gen legacy_`v' = acq_legacy * `v'

	}	

	* clean up the labels on group * year interactions 
	foreach v of varlist target_yr* {
		local num = subinstr("`v'", "target_yr", "", 1) 
		local `num' %4.0f `num'
		local num = `num' + 2002
		label var `v' "Target * `num'"
	}


	foreach v of varlist legacy_yr* {
		local num = subinstr("`v'", "legacy_yr", "", 1) 
		local `num' %4.0f `num'
		local num = `num' + 2002
		label var `v' "Acquirer * `num'"
	}	

	* drop dummy for 2006 (t-1)
	drop *yr4

	* gen year * hrr FEs
	egen yrhrr = group(year hrrcode)

	* gen year * HRR trend
	gen yr_hrr_trend = year * hrrcode
	
	* create a legacy and target trend 
	gen acquirer_trend = year * acq_legacy 
	gen target_trend = year * target 
}


******
******  run the BH test from Adam's code 
******
if `run_bh_test' == 1{
	
	/*cap rm "`fpath_output'/bh_regression_estimates.txt"
	cap rm "`fpath_output'/acquirer_theta.txt"
	cap rm "`fpath_output'/target_theta.txt"
	*/
	
	cap rm "`fpath_output'/bh_regression_estimatesv2.txt"
	cap rm "`fpath_output'/acquirer_thetav2.txt"
	cap rm "`fpath_output'/target_thetav2.txt"	
	
	gen orig_n = _n
	expand 2
	egen stack = seq(), from(0) by(orig_n)

	foreach var of varlist ///
	postm_legacy postm_target interim_legacy interim_target ///
	acquirer_trend target_trend ///
	{
		gen `var'Xbase = `var'*(stack==0)
		gen `var'Xtrend = `var'*(stack==1)
	}

	forvalues y = 2008(1)2014{
		gen legacyXbaseX`y' = (acq_legacy == 1)*(year == `y')*(stack==0)
		gen legacyXtrendX`y' = (acq_legacy == 1)*(year == `y')*(stack==1)
		
		gen targetXbaseX`y' = (target== 1)*(year == `y')*(stack==0)
		gen targetXtrendX`y' = (target == 1)*(year == `y')*(stack==1)
		
	}
	
	
	
	
	foreach v in `main_vars' {
		di "****** Variable: `v'" 
		preserve
		
		** number of post years
		local N = 7
		
		** vendor count
		if "`v'" == "discvendcount"{ 
			* bring in the vendor distance info 
			merge m:1 id year using "`fpath_himss'/vendor_distance_indiv2003_2014_v2.dta", keep(match)
			gen discvendcount = distlegacy^2
		}
	
		** drop 2008 if physician flows variable 
		if inlist("`v'", "wcost_entrants", "wcost_exits", "wcost_churn"){
			merge m:1 id year using "`fpath_physician_flows'/physician_flows_data2003_2014", keepusing(wpat* wcost* *_sh nphyshosp) keep(match) nogen 
			drop if year == 2008
			local N = 6
		}
	
		** drop 2003 for capital investment results 
		if "`v'" == "lw_capinv_tot" {
			drop if year == 2003
		}
	
		** run the regressions		
		if inlist("`v'", "shdx", "rhdx") {	
			continue 
		}
	
		if !inlist("`v'", "shdx", "rhdx")  {
			*eststo  clear 
			
			local varlab: variable label `v'
			
			** specification from Adam 
			reghdfe `v' ///
			interim_legacyXbase interim_targetXbase ///
			legacyXbaseX* targetXbaseX* ///
			interim_legacyXtrend interim_targetXtrend ///
			legacyXtrendX* targetXtrendX* ///
			acquirer_trendXtrend target_trendXtrend ///
			if forprofit == 1, ///
			absorb(i.year#i.stack i.id2#i.stack) vce(cluster id2)
			
			outreg2 using "`fpath_output'/bh_regression_estimatesv2.txt", label(proper) ctitle(`varlab')  
			
			*And we'd get B_ from the following
			lincom (targetXbaseX2008 + targetXbaseX2009 + targetXbaseX2010 + targetXbaseX2010 +targetXbaseX2011 + targetXbaseX2012 + targetXbaseX2013 + targetXbaseX2014 )/`N'
			local B_target = `r(estimate)'
			local B_target_se = `r(se)'
			local B_target_pval = `r(p)'
			
			lincom (legacyXbaseX2008 + legacyXbaseX2009 + legacyXbaseX2010 + legacyXbaseX2010 + legacyXbaseX2011 + legacyXbaseX2012 + legacyXbaseX2013 + legacyXbaseX2014 )/`N'
			local B_legacy = `r(estimate)'
			local B_legacy_se = `r(se)'	
			local B_legacy_pval = `r(p)'

			*And G_ from
			lincom (targetXtrendX2008 + targetXtrendX2009 + targetXtrendX2010 + targetXtrendX2010 +targetXtrendX2011 + targetXtrendX2012 + targetXtrendX2013 + targetXtrendX2014 )/`N'
			local G_target = `r(estimate)'
			local G_target_se = `r(se)'
			local G_target_pval = `r(p)'

			lincom (legacyXtrendX2008 + legacyXtrendX2009 + legacyXtrendX2010 +legacyXtrendX2010 +legacyXtrendX2011 + legacyXtrendX2012 + legacyXtrendX2013 + legacyXtrendX2014 )/`N'
			local G_legacy = `r(estimate)'
			local G_legacy_se = `r(se)'
			local G_legacy_pval = `r(p)'
			
			*And Theta from
			lincom ((targetXbaseX2008 + targetXbaseX2009 + targetXbaseX2010 + targetXbaseX2011 + targetXbaseX2012 + targetXbaseX2013 + targetXbaseX2014 )/`N') - ((targetXtrendX2008 + targetXtrendX2009 + targetXtrendX2010 +targetXtrendX2011 + targetXtrendX2012 + targetXtrendX2013 + targetXtrendX2014 )/`N') 
			
			outreg2 using "`fpath_output'/target_thetav2.txt", addstat("Estimate of theta", `r(estimate)', "Std Error", `r(se)', "Theta p-value", `r(p)', "Estimate of B_ Target", `B_target', "SE of B_ Target", `B_target_se', "Estimate of G_ Target", `G_target', "SE of G_ Target", `G_target_se', "B_ Target p-val", `B_target_pval', "B_ Legacy p-val", `B_legacy_pval', "G_ Target p-val", `G_target_pval', "G_ Legacy p-val", `G_legacy_pval') label(proper) ctitle(`v') title("Target Estimates")  
			
			
			
			lincom ((legacyXbaseX2008 + legacyXbaseX2009 + legacyXbaseX2010 + legacyXbaseX2011 + legacyXbaseX2012 + legacyXbaseX2013 + legacyXbaseX2014 )/`N') - ((legacyXtrendX2008 + legacyXtrendX2009 + legacyXtrendX2010 +legacyXtrendX2011 + legacyXtrendX2012 + legacyXtrendX2013 + legacyXtrendX2014 )/`N') 
			
						
			outreg2 using "`fpath_output'/acquirer_thetav2.txt", addstat("Estimate of theta", `r(estimate)', "Std Error", `r(se)', "Theta p-value", `r(p)', "Estimate of B_ Legacy", `B_legacy', "SE of B_ Legacy", `B_legacy_se', "Estimate of G_ Legacy", `G_legacy', "SE of G_ Legacy", `G_legacy_se', "B_ Target p-val", `B_target_pval', "B_ Legacy p-val", `B_legacy_pval', "G_ Target p-val", `G_target_pval', "G_ Legacy p-val", `G_legacy_pval') label(proper) ctitle(`v') title("Legacy Estimates")  
			
					
			
			

		} /*end regression */ 
		
	restore 
}
		
}






if `clean_tables' == 1{
	

	insheet using "`fpath_output'/bh_regression_estimatesv2.txt", clear 
	
	export excel using "`fpath_output'/REStat_revision_tables.xls", sheet("Bilinski-Hatfield regression", replace) 
	
	insheet using "`fpath_output'/acquirer_thetav2.txt", clear 
	keep if inlist(v1, "Estimate of theta", "Std Error", "Theta p-value", "VARIABLES", "Estimate of B_ Legacy", "SE of B_ Legacy", "Estimate of G_ Legacy", "SE of G_ Legacy") | regexm(v1, "p-val")
	
	export delim using "`fpath_output'/tmp.txt", novarnames replace 
	
	import delim using "`fpath_output'/tmp.txt", clear 
	
	xpose, clear varname 
	drop if missing(v1)
	rename v1 acq_theta
	rename v2 acq_se 
	rename v3 acq_pval
	rename v4 b_legacy 
	rename v5 b_legacy_se
	rename v6 g_legacy
	rename v7 g_legacy_se 
	rename v8 b_target_pval
	rename v9 b_legacy_pval
	rename v10 g_target_pval
	rename v11 g_legacy_pval 
	rename _varname variable 
	order variable *theta *se *pval 

	gen open = "("
	gen close = ")"
	egen acq_se2 = concat(open acq_se close)
	egen b_legacy_se2 = concat(open b_legacy_se close)
	egen g_legacy_se2 = concat(open g_legacy_se close)
	drop b_legacy_se g_legacy_se acq_se open close 
	rename acq_se2 acq_se 
	rename b_legacy_se2 b_legacy_se 
	rename g_legacy_se2 g_legacy_se 
	
	gen asterisk = "*" if acq_pval <= .1 & acq_pval > .05
	replace asterisk = "**" if acq_pval <= .05 & acq_pval > .01 
	replace asterisk = "***" if acq_pval <= .01 
	egen acq_coef2 = concat(acq_theta asterisk)
	drop acq_theta asterisk
	rename acq_coef2 acq_theta 
	
	foreach var in b_legacy g_legacy {
	
		gen asterisk = "*" if `var'_pval <= .1 & `var'_pval > .05
		replace asterisk = "**" if `var'_pval <= .05 & `var'_pval > .01
		replace asterisk = "***" if `var'_pval <= .01 
		
		egen `var'_v2 = concat(`var' asterisk)
		drop  `var' asterisk
		rename `var'_v2 `var'
		
	}
	
	order variable acq_theta acq_se acq_pval b* g*
	
	
	tempfile acq_theta
	save `acq_theta', replace 
	
	
	insheet using "`fpath_output'/target_thetav2.txt", clear 
	keep if inlist(v1, "Estimate of theta", "Std Error", "Theta p-value", "VARIABLES", "Estimate of B_ Target", "SE of B_ Target", "Estimate of G_ Target", "SE of G_ Target") | regexm(v1, "p-val")
	
	export delim using "`fpath_output'/tmp.txt", novarnames replace 
	
	import delim using "`fpath_output'/tmp.txt", clear 
	
	xpose, clear varname 
	drop if missing(v1)
	rename v1 target_theta
	rename v2 target_se 
	rename v3 target_pval
	rename v4 b_target
	rename v5 b_target_se
	rename v6 g_target
	rename v7 g_target_se 
	rename v8 b_target_pval
	rename v9 b_legacy_pval
	rename v10 g_target_pval
	rename v11 g_legacy_pval 
	rename _varname variable 
	order variable *theta *se *pval 
	
	gen open = "("
	gen close = ")"
	egen target_se2 = concat(open target_se close)
	egen b_target_se2 = concat(open b_target_se close)
	egen g_target_se2 = concat(open g_target_se close)
	drop b_target_se g_target_se target_se open close 
	rename target_se2 target_se 
	rename b_target_se2 b_target_se 
	rename g_target_se2 g_target_se 

	gen asterisk = "*" if target_pval <= .1 & target_pval > .05
	replace asterisk = "**" if target_pval <= .05 & target_pval > .01 
	replace asterisk = "***" if target_pval <= .01 
	egen target_coef2 = concat(target_theta asterisk)
	drop target_theta asterisk
	rename target_coef2 target_theta 
	
	foreach var in b_target g_target {
	
		gen asterisk = "*" if `var'_pval <= .1 & `var'_pval > .05
		replace asterisk = "**" if `var'_pval <= .05 & `var'_pval > .01
		replace asterisk = "***" if `var'_pval <= .01 
		
		egen `var'_v2 = concat(`var' asterisk)
		drop  `var' asterisk
		rename `var'_v2 `var'
		
	}
	
	order variable target_theta target_se target_pval b* g*
	
	merge 1:1 variable using `acq_theta'
	drop _merge *pval 
	rename variable depvar 
	order depvar b_legacy b_legacy_se g_legacy g_legacy_se acq_theta acq_se b_target b_target_se  g_target g_target_se target_theta target_se 
	
	label var target_theta "Target theta estimate"
	label var target_se "Target std error"
	label var acq_theta "Acquirer theta estimate"
	label var acq_se "Acquirer std error"
	label var b_legacy "B_ Acquirer"
	label var b_target "B_ Target"
	label var g_legacy "G_ Acquirer"
	label var g_target "G_ Target"
	label var b_legacy_se "SE of B_ Legacy"
	label var b_target_se "SE of B_ Target"
	label var g_legacy_se "SE of G_ Legacy"
	label var g_target_se "SE of G_ Target" 
	
	replace depvar = "Discordant vendor count" if depvar == "discvendcount"
	replace depvar = "Log of costs per inpatient" if depvar == "ladjcosts_w"
	replace depvar = "Log of revenue per inpatient" if depvar == "ladjrev_w"
	replace depvar = "Log of price index" if depvar == "ldafny_price05"
	replace depvar = "Log of FTE" if depvar == "logfte"
	replace depvar = "Log of 1 + capital investment" if depvar == "lw_capinv_tot"
	replace depvar = "Profit margin" if depvar == "profit_margin_w"
	replace depvar = "Readmission" if depvar == "rhdx"
	replace depvar = "Survival" if depvar == "shdx"
	replace depvar = "Physician churn rate" if depvar == "wcost_churn"
	replace depvar = "Physician entry rate" if depvar == "wcost_entrants"
	replace depvar = "Physician exit rate" if depvar == "wcost_exits"


	
	
	export excel using "`fpath_output'/TableA9.xls", sheet("Bilinski-Hatfield theta est", replace) firstrow(varlabel)
	
	}




